{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os \n",
    "import sys\n",
    "import numpy as np\n",
    "from scipy.spatial.transform import Rotation as R \n",
    "import rospy \n",
    "import rospkg\n",
    "import jupyros as jr\n",
    "\n",
    "from std_msgs.msg import String, Header\n",
    "from geometry_msgs.msg import PoseStamped, Pose, Point, Quaternion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to import pyassimp, see https://github.com/ros-planning/moveit/issues/86 for more info\n",
      "Jupyter environment detected. Enabling Open3D WebVisualizer.\n",
      "[Open3D INFO] WebRTC GUI backend enabled.\n",
      "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt to /home/junting/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n",
      "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
      "[nltk_data]     /home/junting/nltk_data...\n",
      "[nltk_data]   Package averaged_perceptron_tagger is already up-to-\n",
      "[nltk_data]       date!\n"
     ]
    }
   ],
   "source": [
    "# add catkin_ws context \n",
    "sys.path.append(\"/home/junting/franka_ws/devel/lib/python3.9/site-packages\")\n",
    "\n",
    "from src.lmp import *\n",
    "from src.env.multimodal_env import MultiModalEnv\n",
    "from src.configs.config import load_config\n",
    "\n",
    "cfg_tabletop = load_config(\"perception_few_shot_gpt_3.5.yaml\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[DEBUG] [1701957516.116330, 0.000000]: init_node, name[/jupyter_multimodal], pid[2597332]\n",
      "[DEBUG] [1701957516.116937, 0.000000]: binding to 0.0.0.0 0\n",
      "[DEBUG] [1701957516.117344, 0.000000]: bound to 0.0.0.0 33435\n",
      "[DEBUG] [1701957516.117496, 0.000000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957516.118189, 0.000000]: ... service URL is rosrpc://chao-desktop:33435\n",
      "[DEBUG] [1701957516.118583, 0.000000]: [/jupyter_multimodal/get_loggers]: new Service instance\n",
      "[DEBUG] [1701957516.119445, 5454.893000]: ... service URL is rosrpc://chao-desktop:33435\n",
      "[DEBUG] [1701957516.120003, 5454.893000]: [/jupyter_multimodal/set_logger_level]: new Service instance\n",
      "[DEBUG] [1701957516.158593, 5454.932000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957516.159402, 5454.933000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957516.160362, 5454.934000]: connecting to chao-desktop 57687\n",
      "[INFO] [1701957516.161578, 5454.935000]: camera_left: Waiting for camera_left/color/camera_info...\n",
      "[DEBUG] [1701957516.162538, 5454.936000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957516.162824, 5454.936000]: connecting to chao-desktop 57687\n",
      "[INFO] [1701957516.662135, 5455.436000]: camera_left: camera_left/color/camera_info received!\n",
      "[INFO] [1701957516.662900, 5455.436000]: camera_left: camera_left/depth/camera_info received!\n",
      "[DEBUG] [1701957516.665338, 5455.439000]: connecting to chao-desktop 43343\n",
      "[INFO] [1701957516.666429, 5455.440000]: camera_right: Waiting for camera_right/color/camera_info...\n",
      "[DEBUG] [1701957516.666606, 5455.440000]: connecting to chao-desktop 51601\n",
      "[DEBUG] [1701957516.668070, 5455.441000]: connecting to chao-desktop 51601\n",
      "[DEBUG] [1701957516.668360, 5455.442000]: connecting to chao-desktop 43343\n",
      "[INFO] [1701957517.167687, 5455.941000]: camera_right: camera_right/color/camera_info received!\n",
      "[INFO] [1701957517.168968, 5455.942000]: camera_right: camera_right/depth/camera_info received!\n",
      "[DEBUG] [1701957517.172085, 5455.945000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957517.173540, 5455.946000]: connecting to chao-desktop 44301\n",
      "[INFO] [1701957517.173779, 5455.946000]: camera_top: Waiting for camera_top/color/camera_info...\n",
      "[DEBUG] [1701957517.176310, 5455.949000]: connecting to chao-desktop 44301\n",
      "[DEBUG] [1701957517.176659, 5455.950000]: connecting to chao-desktop 43343\n",
      "[INFO] [1701957517.674031, 5456.447000]: camera_top: camera_top/color/camera_info received!\n",
      "[INFO] [1701957517.675482, 5456.448000]: camera_top: camera_top/depth/camera_info received!\n",
      "[DEBUG] [1701957517.678564, 5456.450000]: ... service URL is rosrpc://chao-desktop:33435\n",
      "[DEBUG] [1701957517.679269, 5456.452000]: [/jupyter_multimodal/tf2_frames]: new Service instance\n",
      "\u001b[0m[ INFO] [1701957517.690851502]: Loading robot model 'panda'...\u001b[0m\n",
      "[DEBUG] [1701957517.840825, 5456.613000]: connecting to ('chao-desktop', 34045)\n",
      "[DEBUG] [1701957517.844097, 5456.617000]: connecting to chao-desktop 44561\n",
      "\u001b[0m[ INFO] [1701957519.111820338, 5457.884000000]: Ready to take commands for planning group panda_manipulator.\u001b[0m\n",
      "\u001b[0m[ INFO] [1701957519.491292958, 5458.263000000]: Ready to take commands for planning group panda_arm.\u001b[0m\n",
      "[DEBUG] [1701957519.497888, 5458.269000]: connecting to chao-desktop 54887\n",
      "[DEBUG] [1701957519.498725, 5458.271000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957519.499533, 5458.271000]: connecting to chao-desktop 42831\n",
      "[DEBUG] [1701957519.500103, 5458.272000]: connecting to chao-desktop 54887\n",
      "[DEBUG] [1701957519.501392, 5458.273000]: connecting to chao-desktop 44561\n",
      "[DEBUG] [1701957519.513048, 5458.285000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957519.522557, 5458.294000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957519.524101, 5458.296000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957519.736493, 5458.506000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957519.738575, 5458.508000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957519.738864, 5458.509000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957520.028979, 5458.799000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957520.030619, 5458.800000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957520.030877, 5458.800000]: connecting to chao-desktop 43343\n",
      "Gripper action clients ready\n",
      "Set up Franka API. Ready to go!\n",
      "[DEBUG] [1701957520.328585, 5459.098000]: connecting to ('chao-desktop', 37135)\n",
      "[INFO] [1701957520.329333, 5459.099000]: Grasp detection: remote model service ready\n",
      "VISION BACKBONE USE GRADIENT CHECKPOINTING:  False\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/junting/miniforge-pypy3/envs/ros_env/lib/python3.9/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3190.)\n",
      "  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LANGUAGE BACKBONE USE GRADIENT CHECKPOINTING:  False\n"
     ]
    }
   ],
   "source": [
    "########################################\n",
    "# initialize environment\n",
    "########################################\n",
    "rospy.init_node('jupyter_multimodal', log_level=rospy.DEBUG)\n",
    "# get package root path \n",
    "pkg_root = rospkg.RosPack().get_path('instruct_to_policy')\n",
    "\n",
    "# setup environment\n",
    "env = MultiModalEnv(cfg_tabletop)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[DEBUG] [1701957527.541875, 5466.295000]: Transitioning to ACTIVE (from WAITING_FOR_GOAL_ACK, goal: /jupyter_multimodal-1-5466.295)\n",
      "[DEBUG] [1701957527.575748, 5466.329000]: Transitioning to WAITING_FOR_RESULT (from ACTIVE, goal: /jupyter_multimodal-1-5466.295)\n",
      "[DEBUG] [1701957527.577978, 5466.331000]: Transitioning to DONE (from WAITING_FOR_RESULT, goal: /jupyter_multimodal-1-5466.295)\n",
      "[DEBUG] [1701957527.684861, 5466.438000]: connecting to ('chao-desktop', 43343)\n",
      "[DEBUG] [1701957527.685981, 5466.439000]: connecting to ('chao-desktop', 43343)\n",
      "[DEBUG] [1701957527.686880, 5466.440000]: connecting to ('chao-desktop', 43343)\n",
      "[DEBUG] [1701957527.687809, 5466.441000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957527.689047, 5466.442000]: connecting to chao-desktop 43343\n",
      "[DEBUG] [1701957527.691583, 5466.443000]: connecting to chao-desktop 34045\n",
      "[DEBUG] [1701957527.692745, 5466.444000]: connecting to chao-desktop 34045\n",
      "[INFO] [1701957528.937402, 5467.647000]: Environment reset.\n"
     ]
    }
   ],
   "source": [
    "env.reset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/junting/catkin_ws/src/llm-manipulation-bench/instruct_to_policy/scripts\n"
     ]
    }
   ],
   "source": [
    "print(os.getcwd())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/junting/miniforge-pypy3/envs/ros_env/lib/python3.9/site-packages/transformers/modeling_utils.py:907: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n",
      "  warnings.warn(\n",
      "/home/junting/miniforge-pypy3/envs/ros_env/lib/python3.9/site-packages/torch/nn/functional.py:4070: UserWarning: nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.\n",
      "  warnings.warn(\"nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.\")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "inference time per image: 1.7238458859792445\n",
      "tensor([[851.1652, 536.1738, 901.7881, 590.0108]]) ['apple']\n",
      "inference time per image: 0.4822529189987108\n",
      "tensor([[1118.5205,  429.1591, 1327.5215,  542.5580]]) ['plate']\n",
      "inference time per image: 0.48819605400785804\n",
      "tensor([[ 874.4218,  591.7472, 1011.2419,  704.9487]]) ['bowl']\n",
      "inference time per image: 0.4839708429935854\n",
      "tensor([[866.5488, 414.8352, 909.0527, 459.0865]]) ['apple']\n",
      "inference time per image: 0.4816374459769577\n",
      "tensor([[ 720.1912,  529.7885, 1029.8835,  685.6276]]) ['plate']\n",
      "inference time per image: 0.4746538649778813\n",
      "tensor([[674.9188, 380.5370, 756.0240, 453.2359]]) ['bowl']\n",
      "inference time per image: 0.4884587050182745\n",
      "tensor([], size=(0, 4)) []\n",
      "inference time per image: 0.48455235300934874\n",
      "tensor([], size=(0, 4)) []\n",
      "inference time per image: 0.4839773509884253\n",
      "tensor([[1167.4608,  251.0338, 1257.0444,  341.5673]]) ['bowl']\n",
      "[DEBUG] [1701957535.782465, 5474.470000]: connecting to chao-desktop 34045\n",
      "Added object apple_0 to planning scene\n",
      "[DEBUG] [1701957535.943885, 5474.632000]: connecting to chao-desktop 34045\n",
      "Added object plate_0 to planning scene\n",
      "[DEBUG] [1701957535.996009, 5474.684000]: connecting to chao-desktop 34045\n",
      "Added object bowl_0 to planning scene\n"
     ]
    }
   ],
   "source": [
    "env.detect_objects(object_list=[\"apple\", \"plate\", \"bowl\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['apple_0', 'plate_0', 'bowl_0']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.scene.visualize_3d_bboxes()\n",
    "env.get_obj_name_list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[DEBUG] [1701957538.466100, 5477.153000]: connecting to chao-desktop 34045\n",
      "Added object apple_0 to planning scene\n",
      "[DEBUG] [1701957538.497279, 5477.184000]: connecting to chao-desktop 34045\n",
      "Added object plate_0 to planning scene\n",
      "[DEBUG] [1701957538.545757, 5477.233000]: connecting to chao-desktop 34045\n",
      "Added object bowl_0 to planning scene\n"
     ]
    }
   ],
   "source": [
    "env.add_scene_objects_to_moveit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[INFO] [1701957538.631406, 5477.313000]: Sending perception data to grasp detection service\n",
      "[DEBUG] [1701957538.634861, 5477.321000]: connecting to chao-desktop 37135\n"
     ]
    }
   ],
   "source": [
    "grasp_pose = env.parse_adaptive_shape_grasp_pose('apple_0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['apple_0', 'bowl_0', 'plate_0']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.planning_scene.get_known_object_names()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'apple_0': {}, 'plate_0': {}, 'bowl_0': {}}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[DEBUG] [1701957541.134768, 5479.819000]: Transitioning to ACTIVE (from WAITING_FOR_GOAL_ACK, goal: /jupyter_multimodal-2-5479.818)\n",
      "[DEBUG] [1701957541.170045, 5479.853000]: Transitioning to WAITING_FOR_RESULT (from ACTIVE, goal: /jupyter_multimodal-2-5479.818)\n",
      "[DEBUG] [1701957541.171078, 5479.855000]: Transitioning to DONE (from WAITING_FOR_RESULT, goal: /jupyter_multimodal-2-5479.818)\n",
      "[DEBUG] [1701957541.172121, 5479.856000]: Waiting for rviz to update\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[WARN] [1701957542.225198, 5480.908000]: MoveitEnv: Could not plan cartesian_path to target pose \n",
      "position: \n",
      "  x: -0.27649660290796935\n",
      "  y: -0.17286453834982995\n",
      "  z: 1.1564431072901833\n",
      "orientation: \n",
      "  x: 0.6993529206633681\n",
      "  y: -0.6677615559119899\n",
      "  z: 0.1843941981573167\n",
      "  w: 0.17606469405174707.\n",
      " Plan accuracy: 0.6086956521739131\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[DEBUG] [1701957552.459397, 5491.136000]: Transitioning to ACTIVE (from WAITING_FOR_GOAL_ACK, goal: /jupyter_multimodal-3-5491.133)\n",
      "[DEBUG] [1701957552.494185, 5491.170000]: Transitioning to WAITING_FOR_RESULT (from ACTIVE, goal: /jupyter_multimodal-3-5491.133)\n",
      "[DEBUG] [1701957552.494934, 5491.171000]: Transitioning to DONE (from WAITING_FOR_RESULT, goal: /jupyter_multimodal-3-5491.133)\n",
      "[DEBUG] [1701957552.496824, 5491.172000]: connecting to chao-desktop 34045\n",
      "[DEBUG] [1701957552.498550, 5491.175000]: Waiting for rviz to update\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.open_gripper()\n",
    "env.grasp(grasp_pose)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[DEBUG] [1701957555.048203, 5493.723000]: Transitioning to ACTIVE (from WAITING_FOR_GOAL_ACK, goal: /jupyter_multimodal-4-5493.722)\n",
      "[DEBUG] [1701957555.550191, 5494.224000]: Transitioning to WAITING_FOR_RESULT (from ACTIVE, goal: /jupyter_multimodal-4-5493.722)\n",
      "[DEBUG] [1701957555.551868, 5494.226000]: Transitioning to DONE (from WAITING_FOR_RESULT, goal: /jupyter_multimodal-4-5493.722)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.close_gripper()\n",
    "env.attach_object('apple_0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[DEBUG] [1701957555.593313, 5494.248000]: Waiting for rviz to update\n"
     ]
    }
   ],
   "source": [
    "place_pose = env.parse_place_pose('apple_0', 'plate_0')\n",
    "env.move_to_pose(place_pose)\n",
    "env.open_gripper()\n",
    "env.detach_object('apple_0')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ROS Python 3 (ipykernel)",
   "language": "python",
   "name": "ros_python"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
